---
title: "Import the 2019 Istanbul election results from the manually assembled spreadsheet"
author: "Milan Svolik"
date: "Last Updated: April 20, 2023"
output: pdf_document
---

```{r setup, include=FALSE}
# Clear memory
rm(list = ls(all = TRUE))

# Load libraries
library(tidyverse)
library(stargazer)
library(lmtest)
library(reshape2)
library(foreign)
library(ggplot2)
library(estimatr)

# Display code but not warnings along with its results in the knitted file
knitr::opts_chunk$set(echo = TRUE, warning = F, message = F)
options(scipen=999)

set.seed(5877)
```

# Import the 2019 Istanbul election results from the excel file 
* The excel file was constructed by hand copying ballot-box level election results district-by-district from the website of the Turkish Electoral Commission (YSK):  https://sonuc.ysk.gov.tr/sorgu
```{R}
library("readxl")
df_march <- read_excel("data/2019 Istanbul Mayoral Election, Complete Results.xlsx", sheet = "Ballot Box Level, March")
df_june <- read_excel("data/2019 Istanbul Mayoral Election, Complete Results.xlsx", sheet = "Ballot Box Level, June")
```

## CREATE A UNIQUE BALLOT BOX, NEIGHBORHOOD, AND DISTRICT IDENTIFIER
* There is a unique result per each ballot box
```{r}
# CHECK THAT THERE IS A UNIQUE RESULT FOR EACH BALLOT BOX PER ROUND: YES
df_march %>% 
  group_by(district_name, neighborhood_name, box_number) %>% 
  count(month) %>% 
  filter(n !=1)

df_june %>% 
  group_by(district_name, neighborhood_name, box_number) %>% 
  count(month) %>% 
  filter(n !=1)

# CREATE BALLOT BOX  IDENTIFIERS 
df_march <- df_march %>% 
  arrange(district_name, neighborhood_name, box_number) %>% 
  mutate(box_id=row_number())

df_june <- df_june %>% 
  arrange(district_name, neighborhood_name, box_number) %>% 
  mutate(box_id=row_number())

summary(df_march$box_id)
summary(df_june$box_id)

# CREATE NEIGHBORHOOD IDENTIFIERS 
df_march <- df_march %>% 
  arrange(district_name, neighborhood_name, box_number) %>% 
  group_by(district_name, neighborhood_name) %>%
  mutate(neighborhood_id=cur_group_id())

df_june <- df_june %>% 
  arrange(district_name, neighborhood_name, box_number) %>% 
  group_by(district_name, neighborhood_name) %>%
  mutate(neighborhood_id=cur_group_id())

summary(df_march$neighborhood_id)
summary(df_june$neighborhood_id)

# CREATE DISTRICT IDENTIFIERS 
df_march <- df_march %>% 
  arrange(district_name, neighborhood_name, box_number) %>% 
  group_by(district_name) %>%
  mutate(district_id=cur_group_id())

df_june <- df_june %>% 
  arrange(district_name, neighborhood_name, box_number) %>% 
  group_by(district_name) %>%
  mutate(district_id=cur_group_id())

summary(df_march$district_id)
summary(df_june$district_id)
```
## CHECK IF THE DATA AGGREGATES UP TO THE CITY LEVEL 
* Everything adds up

### March 2019
* Official results: 
  - Total Number of Chests:	31,186
  - Number of Registered Voters in Ballots:	10,570,939
  - Number of Registered Voters:	10,560,963
  - Number of Voters:	8,865,072
  - Election Participation Rate:	83.94%		
  - Valid Votes:	8,549,822
  - Number of Invalid Votes:	315,250
  - AKP:	4,156,036
  - CHP: 4,169,765
```{R}
df_march %>% 
  ungroup() %>%
  count()

df_march %>% 
  ungroup() %>%
  summarize(sum(pop_vote), sum(total_vote), sum(AKP), sum(CHP))
```
### June 2019
* Official results: 
  - Total Number of Chests:	31,186
  - Number of Registered Voters in Ballots:	10,570,354	
  - Number of Voters:	8,925,166
  - Number of Registered Voters:	10,560,963		
  - Election Participation Rate:	84.51%		
  - Valid Votes:	8.746.566
  - Number of Invalid Votes:	178,600
  - AKP: 3,936,068
  - CHP: 4,742,082
```{R}
df_june %>% 
  ungroup() %>%
  count()

df_june %>% 
  ungroup() %>%
  summarize(sum(pop_vote), sum(total_vote), sum(AKP), sum(CHP))
```

## CHECK DATA FOR AMINISTRATIVE IRREGULARITIES
*	In March, there are 53 ballot boxes that show negative abstention rates (i.e. the number of total votes is equal or exceeds the number of registered voters )
*	In June, there are 52 such ballot boxes
* These are mostly detention facilities in which the number of "registered voters" fluctuates from day to day
```{R}
df_march %>%
  mutate(ABS=pop_vote - total_vote) %>%
  filter(ABS==0)

df_march %>%
  mutate(ABS=pop_vote - total_vote) %>%
  filter(ABS<0)

df_june %>%
  mutate(ABS=pop_vote - total_vote) %>%
  filter(ABS==0)

df_june %>%
  mutate(ABS=pop_vote - total_vote) %>%
  filter(ABS<0)

df_march %>%
  mutate(ABS=pop_vote - (AKP + CHP)) %>%
  filter(ABS==0)

df_march %>%
  mutate(ABS=pop_vote - (AKP + CHP)) %>%
  filter(ABS<0)

df_june %>%
  mutate(ABS=pop_vote - (AKP + CHP)) %>%
  filter(ABS==0)

df_june %>%
  mutate(ABS=pop_vote - (AKP + CHP)) %>%
  filter(ABS<0)
```
### CREATE A DUMMY FOR IRREGULAR BALLOT BOXES
* Detention facilities box numbers start with 9
  - 62 total in March & June
```{r}
df_march %>%
  filter(box_number>=9000)

df_june %>%
  filter(box_number>=9000)

df_march <- df_march %>%
  mutate(regular="yes") %>%
  mutate(regular = ifelse(pop_vote - total_vote<0, "error", regular)) %>%
  mutate(regular = ifelse(box_number>=9000, "prison", regular)) 

table(df_march$regular)

df_june <- df_june %>%
  mutate(regular="yes") %>%
  mutate(regular = ifelse(pop_vote - total_vote<0, "error", regular)) %>%
  mutate(regular = ifelse(box_number>=9000, "prison", regular)) 

table(df_june$regular)
```

## Create a variable that records those who voted for THIRD PARTIES/CANDIDATES
  - March: 224021
  - June: 68416
```{R}
colnames(df_march)
df_results_march <- df_march %>% 
  mutate(THIRD = SAADET + BTP + TKP + `VATAN PARTY` + DP + DSP + `OZGE AKMAN` + `DURSUNALI BACIOGLU` + `VEDAT OZTURK` + `HUSEYIN KARABULUT` + `AHMET CORDUK` + `MUHAMMET ALI CANCA` + `MEHMET YILDIZ`  + `GULDES ONKOYUN` + `HASAN ATASOY TORUN` + `MEMET ALI AYDOGMUS` + `LUTFU YILMAZ` + `SECKIN ILKER` +  `HALEF ALP` + `OZKAN MUSTAFA KUCUKKURAL` + `FATMA RAGIBE KANIKURU LOGOGLU` + `HAZER ORUC KAYA` + `BURHAN EROL` + `DOGAN DUMAN` + `AYSEL TEKEREK` + `ALI RIZA KANSIZ` + `ABDULCELIL GULAP` +  `ORUC KARACIK` + `MEHMET CELAL BAYKARA` + `BURAK KADIOGLU`)

table(df_results_march$THIRD)
summary(df_results_march$THIRD)

df_results_march %>%
  ungroup %>%
  summarize(sum(THIRD))

colnames(df_june)
df_results_june <- df_june %>% 
  mutate(THIRD = SAADET + `VATAN PARTISI` + `DURSUNALI BACIOGLU` + `VEDAT OZTURK` + `AHMET CORDUK` + `MUHAMMET ALI CANCA` + `MEHMET YILDIZ` + `GULDES ONKOYUN` +  `HASAN ATASOY TORUN` + `MEMET ALI AYDOGMUS` + `LUTFU YILMAZ` + `SECKIN ILKER` + `OZKAN MUSTAFA KUCUKKURAL` + `FATMA RAGIBE KANIKURU LOGOGLU` + `BURHAN EROL` +  `DOGAN DUMAN` +  `ALI RIZA KANSIZ` + `ABDULCELIL GULAP` + `BURAK KADIOGLU`)

table(df_results_june$THIRD)
summary(df_results_june$THIRD)

df_results_june %>%
  ungroup %>%
  summarize(sum(THIRD))
```

## KEEP ONLY THE COLUMNS RELEVANT FOR THE TWO-PARTY ANALYSIS AND MERGE
* drop all irregular ballot boxes (85 total)
  - complete: 31186
  - filtered: 31101
```{R}
df_results_march <- df_results_march %>% 
  select(district_name, district_id, neighborhood_name, neighborhood_id, box_id, pop_vote, AKP, CHP, THIRD, regular) %>%
  rename(registered_march=pop_vote, AKP_march=AKP, CHP_march=CHP, THIRD_march=THIRD) %>%
  mutate(ABS_march=registered_march - (AKP_march + CHP_march + THIRD_march))

df_results_june <- df_results_june %>% 
  select(district_name, district_id, neighborhood_name, neighborhood_id, box_id, pop_vote, AKP, CHP, THIRD, regular) %>%
  rename(registered_june=pop_vote, AKP_june=AKP, CHP_june=CHP, THIRD_june=THIRD) %>%
  mutate(ABS_june=registered_june - (AKP_june + CHP_june + THIRD_june))
  
df_results <- df_results_march %>%
  inner_join(df_results_june, by = "box_id")

# CHECK THAT THE MERGE WORKED OUT CORRECTLY
sum(df_results$district_name.x != df_results$district_name.y)
sum(df_results$district_id.x != df_results$district_id.y)
sum(df_results$neighborhood_name.x != df_results$neighborhood_name.y)
sum(df_results$neighborhood_id.x != df_results$neighborhood_id.y)
summary(df_results$box_id)
```

## DROP IRREGULAR BALLOT BOXES
```{R}
df_results %>% 
  count()

df_results <- df_results %>%
  filter(regular.x=="yes" & regular.y=="yes")
```

## KEEP ONLY THE VARIABLES THAT WILL BE USED IN THE ANALYSIS
```{R}
df_results <- df_results %>%
  select(district_name.x, district_id.x, neighborhood_name.x, neighborhood_id.x, box_id, AKP_march, CHP_march, ABS_march, THIRD_march, AKP_june, CHP_june, ABS_june, THIRD_june) %>%
  rename(district_name=district_name.x, district_id=district_id.x, neighborhood_name=neighborhood_name.x, neighborhood_id=neighborhood_id.x)
```

## RE-CHECK HOW THE DATA AGGREGATES UP TO THE CITY LEVEL 
* Everything adds up, accounting for having dropped irregular ballot boxes
* Note that the number of neighborhoods drops to 956 
```{r}
df_results %>% 
  distinct(district_id) %>%
  count()

df_results %>%
  distinct(neighborhood_id) %>%
  count()

df_results %>% 
  count()
```


### March 2019
* Official results: 
  - Total Number of Chests:	31,186
  - Number of Registered Voters in Ballots:	10,570,939
  - Number of Registered Voters:	10,560,963
  - Number of Voters:	8,865,072
  - Election Participation Rate:	83.94%		
  - Valid Votes:	8,549,822
  - Number of Invalid Votes:	315,250
  - AKP:	4,156,036
  - CHP: 4,169,765
```{R}
df_results %>% 
  count()

df_results %>% 
  ungroup() %>%
  summarize(sum(AKP_march), sum(CHP_march), sum(THIRD_march), sum(ABS_march))

df_results %>% 
  ungroup() %>%
  summarize(sum(AKP_march), sum(CHP_march), sum(THIRD_march), sum(ABS_march)) %>%
  sum()
```
### June 2019
* Official results: 
  - Total Number of Chests:	31,186
  - Number of Registered Voters in Ballots:	10,570,354	
  - Number of Voters:	8,925,166
  - Number of Registered Voters:	10,560,963		
  - Election Participation Rate:	84.51%		
  - Valid Votes:	8.746.566
  - Number of Invalid Votes:	178,600
  - AKP: 3,936,068
  - CHP: 4,742,082
```{R}
df_results %>% 
  ungroup() %>%
  count()

df_results %>% 
  ungroup() %>%
  summarize(sum(AKP_june), sum(CHP_june), sum(THIRD_june), sum(ABS_june))

df_results %>% 
  ungroup() %>%
  summarize(sum(AKP_june), sum(CHP_june), sum(THIRD_june), sum(ABS_june)) %>%
  sum()
```


# SAVE
```{R}
save(df_results, file = "data/df_2019istanbul_results.RData")
```
